/*

 * Licensed to the Apache Software Foundation (ASF) under one

 * or more contributor license agreements.  See the NOTICE file

 * distributed with this work for additional information

 * regarding copyright ownership.  The ASF licenses this file

 * to you under the Apache License, Version 2.0 (the

 * "License"); you may not use this file except in compliance

 * with the License.  You may obtain a copy of the License at

 *

 *     http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an "AS IS" BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 */

package com.bff.gaia.unified.sdk.extensions.sql.impl.transform.agg;



import com.bff.gaia.unified.sdk.annotations.Internal;

import com.bff.gaia.unified.sdk.coders.Coder;

import com.bff.gaia.unified.sdk.coders.CoderRegistry;

import com.bff.gaia.unified.sdk.coders.SerializableCoder;

import com.bff.gaia.unified.sdk.extensions.sql.impl.utils.BigDecimalConverter;

import com.bff.gaia.unified.sdk.schemas.Schema;

import com.bff.gaia.unified.sdk.transforms.Combine;

import com.bff.gaia.unified.sdk.transforms.SerializableFunction;

import org.apache.calcite.runtime.SqlFunctions;



import java.math.BigDecimal;

import java.math.MathContext;

import java.math.RoundingMode;

import java.util.stream.StreamSupport;



/**

 * {@link Combine.CombineFn} for <em>Variance</em> on {@link Number} types.

 *

 * <p>Calculates Population Variance and Sample Variance using incremental formulas described, for

 * example, by Chan, Golub, and LeVeque in "Algorithms for computing the sample variance: analysis

 * and recommendations", The American Statistician, 37 (1983) pp. 242--247.

 *

 * <p>If variance is defined like this:

 *

 * <ul>

 *   <li>Input elements: {@code (x[1], ... , x[n])}

 *   <li>Sum of elements: {sum(x) = x[1] + ... + x[n]}

 *   <li>Average of all elements in the input: {@code mean(x) = sum(x) / n}

 *   <li>Deviation of {@code i}th element from the current mean: {@code deviation(x, i) = x[i] -

 *       mean(n)}

 *   <li>Variance: {@code variance(x) = deviation(x, 1)^2 + ... + deviation(x, n)^2}

 * </ul>

 *

 * <p>Then variance of combined input of 2 samples {@code (x[1], ... , x[n])} and {@code (y[1], ...

 * , y[m])} is calculated using this formula:

 *

 * <ul>

 *   <li>{@code variance(concat(x,y)) = variance(x) + variance(y) + increment}, where:

 *   <li>{@code increment = m/(n(m+n)) * (n/m * sum(x) - sum(y))^2}

 * </ul>

 *

 * <p>This is also applicable for a single element increment, assuming that variance of a single

 * element input is zero

 *

 * <p>To implement the above formula we keep track of the current variation, sum, and count of

 * elements, and then use the formula whenever new element comes or we need to merge variances for 2

 * samples.

 */

@Internal

public class VarianceFn<T extends Number> extends Combine.CombineFn<T, VarianceAccumulator, T> {



  static final MathContext MATH_CTX = new MathContext(10, RoundingMode.HALF_UP);



  private static final boolean SAMPLE = true;

  private static final boolean POP = false;



  private boolean isSample; // flag to determine return value should be Variance Pop or Sample

  private SerializableFunction<BigDecimal, T> decimalConverter;



  public static <V extends Number> VarianceFn newPopulation(Schema.TypeName typeName) {

    return newPopulation(BigDecimalConverter.forSqlType(typeName));

  }



  public static <V extends Number> VarianceFn newPopulation(

      SerializableFunction<BigDecimal, V> decimalConverter) {



    return new VarianceFn<>(POP, decimalConverter);

  }



  public static <V extends Number> VarianceFn newSample(Schema.TypeName typeName) {

    return newSample(BigDecimalConverter.forSqlType(typeName));

  }



  public static <V extends Number> VarianceFn newSample(

      SerializableFunction<BigDecimal, V> decimalConverter) {



    return new VarianceFn<>(SAMPLE, decimalConverter);

  }



  private VarianceFn(boolean isSample, SerializableFunction<BigDecimal, T> decimalConverter) {

    this.isSample = isSample;

    this.decimalConverter = decimalConverter;

  }



  @Override

  public VarianceAccumulator createAccumulator() {

    return VarianceAccumulator.ofZeroElements();

  }



  @Override

  public VarianceAccumulator addInput(VarianceAccumulator currentVariance, T rawInput) {



    if (rawInput == null) {

      return currentVariance;

    }



    return currentVariance.combineWith(

        VarianceAccumulator.ofSingleElement(SqlFunctions.toBigDecimal(rawInput)));

  }



  @Override

  public VarianceAccumulator mergeAccumulators(Iterable<VarianceAccumulator> variances) {

    return StreamSupport.stream(variances.spliterator(), false)

        .reduce(VarianceAccumulator.ofZeroElements(), VarianceAccumulator::combineWith);

  }



  @Override

  public Coder<VarianceAccumulator> getAccumulatorCoder(

	  CoderRegistry registry, Coder<T> inputCoder) {

    return SerializableCoder.of(VarianceAccumulator.class);

  }



  @Override

  public T extractOutput(VarianceAccumulator accumulator) {

    return decimalConverter.apply(getVariance(accumulator));

  }



  private BigDecimal getVariance(VarianceAccumulator variance) {



    BigDecimal adjustedCount =

        this.isSample ? variance.count().subtract(BigDecimal.ONE) : variance.count();



    return variance.variance().divide(adjustedCount, MATH_CTX);

  }

}